Geometries for Vector Spatial Data

Code
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon as MplPolygon

# Data for streets (line strings)
streets = [
    [(0, 0), (1, 2), (3, 3)],
    [(3, 3), (5, 2), (7, 5)],
    [(7, 5), (8, 8), (10, 10)],
    [(1, 2), (2, 5), (4, 6)],
]

# Data for school catchments (polygons)
catchments = [
    [(0.5, 0.5), (2, 1), (1.5, 3), (0.5, 2)],
    [(3, 4), (5, 3.5), (6, 6), (4, 6.5)],
    [(7, 7), (8.5, 7.5), (9, 9), (7.5, 9)],
]

# Adjusting the data for one school per catchment
schools = [(1.2, 1.8), (4.7, 5.3), (8.2, 8.2)]

# Plotting the GIS map with one school per catchment
fig, ax = plt.subplots(figsize=(8, 8))

# Plotting the streets with one legend entry
ax.plot(*zip(*streets[0]), color='blue', linewidth=2, label="Streets")
for street in streets[1:]:
    street_x, street_y = zip(*street)
    ax.plot(street_x, street_y, color='blue', linewidth=2)

# Plotting the catchments with legend
for i, catchment in enumerate(catchments):
    polygon = MplPolygon(catchment, closed=True, color='orange', alpha=0.5, edgecolor='black')
    ax.add_patch(polygon)
    if i == 0:
        polygon.set_label("School Catchments")

# Plotting the schools (points) with one per catchment
school_x, school_y = zip(*schools)
ax.scatter(school_x, school_y, color='green', s=100, label="Schools")

# Set the legend and title
ax.legend()
ax.set_title('Vector GIS: Street Network, School Catchments, and Schools')
ax.set_xlim(-1, 11)
ax.set_ylim(-1, 11)
ax.set_aspect('equal')

# Save the figure to a file
plt.savefig("vector.png", dpi=300)

# Show the plot
plt.show()
/tmp/ipykernel_2058626/3972444099.py:33: UserWarning: Setting the 'color' property will override the edgecolor or facecolor properties.
  polygon = MplPolygon(catchment, closed=True, color='orange', alpha=0.5, edgecolor='black')

Here, we are faking it regarding a GIS, as matplotlib is a visualization library and doesn’t actually allow us to do any spatial analysis per se.

Code
from shapely import Point
Code
school_points = [Point(school) for school in schools]
Code
school_points
[<POINT (1.2 1.8)>, <POINT (4.7 5.3)>, <POINT (8.2 8.2)>]
Code
school_points[0]

Code
school_0, school_1, school_2 = school_points
Code
school_0

Code
school_1

Code
school_2

Code
from shapely import Polygon
Code
catchment_polygons = [Polygon(catchment) for catchment in catchments]
Code
catchment_0, catchment_1, catchment_2 = catchment_polygons
Code
catchment_0

Code
catchment_1

Code
catchment_2

Code
from shapely import LineString
Code
street_lines = [LineString(street) for street in streets]
Code
street_0, street_1, street_2, street_3 = street_lines
Code
street_0

Code
street_1

Code
street_2

Code
street_3

Geometry Types

Code
school_0.geom_type
'Point'
Code
school_2.geom_type
'Point'
Code
school_0.area
0.0
Code
school_0.length
0.0
Code
list(school_0.coords)
[(1.2, 1.8)]
Code
list(school_2.coords)
[(8.2, 8.2)]
Code
school_0.distance(school_2)
9.4847245611035
Code
street_1.geom_type
'LineString'
Code
street_1.area
0.0
Code
street_1.length
5.841619252963779
Code
school_0.distance(street_1)
2.1633307652783933
Code
catchment_0.geom_type
'Polygon'
Code
catchment_0.area
2.375
Code
list(catchment_0.exterior.coords)
[(0.5, 0.5), (2.0, 1.0), (1.5, 3.0), (0.5, 2.0), (0.5, 0.5)]
Code
catchment_0.bounds
(0.5, 0.5, 2.0, 3.0)
Code
school_0.distance(catchment_0)
0.0
Code
catchment_0.contains(school_0)
True
Code
catchment_0.contains(school_1)
False

Geopandas

Code
import geopandas as gpd
Code
streets = gpd.GeoSeries(street_lines)
Code
streets.plot()

Code
catchments = gpd.GeoSeries(catchment_polygons)
Code
catchments.plot()

Code
schools = gpd.GeoSeries(school_points)
Code
schools.plot()

Code
type(schools)
geopandas.geoseries.GeoSeries
Code
dir(schools)
['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__column_consortium_standard__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__geo_interface__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pandas_priority__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rfloordiv__',
 '__rmatmul__',
 '__rmod__',
 '__rmul__',
 '__ror__',
 '__round__',
 '__rpow__',
 '__rsub__',
 '__rtruediv__',
 '__rxor__',
 '__setattr__',
 '__setitem__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '__xor__',
 '_accessors',
 '_accum_func',
 '_agg_examples_doc',
 '_agg_see_also_doc',
 '_align_for_op',
 '_align_frame',
 '_align_series',
 '_append',
 '_arith_method',
 '_as_manager',
 '_attrs',
 '_binop',
 '_can_hold_na',
 '_check_inplace_and_allows_duplicate_labels',
 '_check_is_chained_assignment_possible',
 '_check_label_or_level_ambiguity',
 '_check_setitem_copy',
 '_clear_item_cache',
 '_clip_with_one_bound',
 '_clip_with_scalar',
 '_cmp_method',
 '_consolidate',
 '_consolidate_inplace',
 '_construct_axes_dict',
 '_construct_result',
 '_constructor',
 '_constructor_expanddim',
 '_constructor_expanddim_from_mgr',
 '_constructor_from_mgr',
 '_data',
 '_deprecate_downcast',
 '_dir_additions',
 '_dir_deletions',
 '_drop_axis',
 '_drop_labels_or_levels',
 '_duplicated',
 '_find_valid_index',
 '_flags',
 '_flex_method',
 '_from_mgr',
 '_from_wkb_or_wkt',
 '_get_axis',
 '_get_axis_name',
 '_get_axis_number',
 '_get_axis_resolvers',
 '_get_block_manager_axis',
 '_get_bool_data',
 '_get_cacher',
 '_get_cleaned_column_resolvers',
 '_get_index_resolvers',
 '_get_label_or_level_values',
 '_get_numeric_data',
 '_get_rows_with_mask',
 '_get_value',
 '_get_values_tuple',
 '_get_with',
 '_getitem_slice',
 '_gotitem',
 '_hidden_attrs',
 '_indexed_same',
 '_info_axis',
 '_info_axis_name',
 '_info_axis_number',
 '_init_dict',
 '_init_mgr',
 '_inplace_method',
 '_internal_names',
 '_internal_names_set',
 '_is_cached',
 '_is_copy',
 '_is_label_or_level_reference',
 '_is_label_reference',
 '_is_level_reference',
 '_is_mixed_type',
 '_is_view',
 '_is_view_after_cow_rules',
 '_item_cache',
 '_ixs',
 '_logical_func',
 '_logical_method',
 '_map_values',
 '_maybe_update_cacher',
 '_memory_usage',
 '_metadata',
 '_mgr',
 '_min_count_stat_function',
 '_name',
 '_needs_reindex_multi',
 '_pad_or_backfill',
 '_protect_consolidate',
 '_reduce',
 '_references',
 '_reindex_axes',
 '_reindex_indexer',
 '_reindex_multi',
 '_reindex_with_indexers',
 '_rename',
 '_replace_single',
 '_repr_data_resource_',
 '_repr_latex_',
 '_reset_cache',
 '_reset_cacher',
 '_set_as_cached',
 '_set_axis',
 '_set_axis_name',
 '_set_axis_nocheck',
 '_set_is_copy',
 '_set_labels',
 '_set_name',
 '_set_value',
 '_set_values',
 '_set_with',
 '_set_with_engine',
 '_shift_with_freq',
 '_slice',
 '_stat_function',
 '_stat_function_ddof',
 '_take_with_is_copy',
 '_to_latex_via_styler',
 '_typ',
 '_update_inplace',
 '_validate_dtype',
 '_values',
 '_where',
 '_wrapped_pandas_method',
 'abs',
 'add',
 'add_prefix',
 'add_suffix',
 'affine_transform',
 'agg',
 'aggregate',
 'align',
 'all',
 'any',
 'append',
 'apply',
 'area',
 'argmax',
 'argmin',
 'argsort',
 'array',
 'asfreq',
 'asof',
 'astype',
 'at',
 'at_time',
 'attrs',
 'autocorr',
 'axes',
 'backfill',
 'between',
 'between_time',
 'bfill',
 'bool',
 'boundary',
 'bounds',
 'buffer',
 'build_area',
 'case_when',
 'centroid',
 'clip',
 'clip_by_rect',
 'combine',
 'combine_first',
 'compare',
 'concave_hull',
 'contains',
 'contains_properly',
 'convert_dtypes',
 'convex_hull',
 'copy',
 'corr',
 'count',
 'count_coordinates',
 'count_geometries',
 'count_interior_rings',
 'cov',
 'covered_by',
 'covers',
 'crosses',
 'crs',
 'cummax',
 'cummin',
 'cumprod',
 'cumsum',
 'cx',
 'delaunay_triangles',
 'describe',
 'diff',
 'difference',
 'disjoint',
 'distance',
 'div',
 'divide',
 'divmod',
 'dot',
 'drop',
 'drop_duplicates',
 'droplevel',
 'dropna',
 'dtype',
 'dtypes',
 'duplicated',
 'dwithin',
 'empty',
 'envelope',
 'eq',
 'equals',
 'estimate_utm_crs',
 'ewm',
 'expanding',
 'explode',
 'explore',
 'exterior',
 'extract_unique_points',
 'factorize',
 'ffill',
 'fillna',
 'filter',
 'first',
 'first_valid_index',
 'flags',
 'floordiv',
 'force_2d',
 'force_3d',
 'frechet_distance',
 'from_arrow',
 'from_file',
 'from_wkb',
 'from_wkt',
 'from_xy',
 'ge',
 'geom_almost_equals',
 'geom_equals',
 'geom_equals_exact',
 'geom_type',
 'geometry',
 'get',
 'get_coordinates',
 'get_geometry',
 'get_precision',
 'groupby',
 'gt',
 'has_sindex',
 'has_z',
 'hasnans',
 'hausdorff_distance',
 'head',
 'hilbert_distance',
 'hist',
 'iat',
 'idxmax',
 'idxmin',
 'iloc',
 'index',
 'infer_objects',
 'info',
 'interiors',
 'interpolate',
 'intersection',
 'intersection_all',
 'intersects',
 'is_ccw',
 'is_closed',
 'is_empty',
 'is_monotonic_decreasing',
 'is_monotonic_increasing',
 'is_ring',
 'is_simple',
 'is_unique',
 'is_valid',
 'is_valid_reason',
 'isin',
 'isna',
 'isnull',
 'item',
 'items',
 'keys',
 'kurt',
 'kurtosis',
 'last',
 'last_valid_index',
 'le',
 'length',
 'line_merge',
 'list',
 'loc',
 'lt',
 'make_valid',
 'map',
 'mask',
 'max',
 'mean',
 'median',
 'memory_usage',
 'min',
 'minimum_bounding_circle',
 'minimum_bounding_radius',
 'minimum_clearance',
 'minimum_rotated_rectangle',
 'mod',
 'mode',
 'mul',
 'multiply',
 'name',
 'nbytes',
 'ndim',
 'ne',
 'nlargest',
 'normalize',
 'notna',
 'notnull',
 'nsmallest',
 'nunique',
 'offset_curve',
 'overlaps',
 'pad',
 'pct_change',
 'pipe',
 'plot',
 'polygonize',
 'pop',
 'pow',
 'prod',
 'product',
 'project',
 'quantile',
 'radd',
 'rank',
 'ravel',
 'rdiv',
 'rdivmod',
 'reindex',
 'reindex_like',
 'relate',
 'relate_pattern',
 'remove_repeated_points',
 'rename',
 'rename_axis',
 'reorder_levels',
 'repeat',
 'replace',
 'representative_point',
 'resample',
 'reset_index',
 'reverse',
 'rfloordiv',
 'rmod',
 'rmul',
 'rolling',
 'rotate',
 'round',
 'rpow',
 'rsub',
 'rtruediv',
 'sample',
 'sample_points',
 'scale',
 'searchsorted',
 'segmentize',
 'select',
 'sem',
 'set_axis',
 'set_crs',
 'set_flags',
 'set_precision',
 'shape',
 'shared_paths',
 'shift',
 'shortest_line',
 'simplify',
 'sindex',
 'size',
 'skew',
 'snap',
 'sort_index',
 'sort_values',
 'squeeze',
 'std',
 'struct',
 'sub',
 'subtract',
 'sum',
 'swapaxes',
 'swaplevel',
 'symmetric_difference',
 'tail',
 'take',
 'to_arrow',
 'to_clipboard',
 'to_crs',
 'to_csv',
 'to_dict',
 'to_excel',
 'to_file',
 'to_frame',
 'to_hdf',
 'to_json',
 'to_latex',
 'to_list',
 'to_markdown',
 'to_numpy',
 'to_period',
 'to_pickle',
 'to_sql',
 'to_string',
 'to_timestamp',
 'to_wkb',
 'to_wkt',
 'to_xarray',
 'total_bounds',
 'touches',
 'transform',
 'translate',
 'transpose',
 'truediv',
 'truncate',
 'type',
 'tz_convert',
 'tz_localize',
 'unary_union',
 'union',
 'union_all',
 'unique',
 'unstack',
 'update',
 'value_counts',
 'values',
 'var',
 'view',
 'voronoi_polygons',
 'where',
 'within',
 'x',
 'xs',
 'y',
 'z']
Code
schools_gdf = gpd.GeoDataFrame(geometry=schools)
Code
schools_gdf.head()
geometry
0 POINT (1.2 1.8)
1 POINT (4.7 5.3)
2 POINT (8.2 8.2)
Code
type(schools_gdf)
geopandas.geodataframe.GeoDataFrame
Code
schools_gdf['students'] = [124, 94, 100]
Code
schools_gdf.head()
geometry students
0 POINT (1.2 1.8) 124
1 POINT (4.7 5.3) 94
2 POINT (8.2 8.2) 100
Code
schools_gdf.plot()

Code
schools_gdf.plot(column='students')

Code
streets_gdf = gpd.GeoDataFrame(geometry=streets)
Code
streets_gdf.plot()

Code
streets_gdf['length'] = streets_gdf.length
Code
streets_gdf.head()
geometry length
0 LINESTRING (0 0, 1 2, 3 3) 4.472136
1 LINESTRING (3 3, 5 2, 7 5) 5.841619
2 LINESTRING (7 5, 8 8, 10 10) 5.990705
3 LINESTRING (1 2, 2 5, 4 6) 5.398346
Code
streets_gdf.plot(column='length', legend=True)

Code
catchments_gdf = gpd.GeoDataFrame(geometry=catchment_polygons)
Code
catchments_gdf['area'] = catchments_gdf.area
catchments_gdf.plot(column='area', legend=True)

Southern California Census Tracts

Code
gdf = gpd.read_parquet("~/data/scag_region.parquet")
Code
gdf.shape
(4580, 194)
Code
type(gdf)
geopandas.geodataframe.GeoDataFrame
Code
gdf.head()
geoid n_asian_under_15 n_black_under_15 n_hispanic_under_15 n_native_under_15 n_white_under_15 n_persons_under_18 n_asian_over_60 n_black_over_60 n_hispanic_over_60 ... year n_total_housing_units_sample p_nonhisp_white_persons p_white_over_60 p_black_over_60 p_hispanic_over_60 p_native_over_60 p_asian_over_60 p_disabled geometry
0 06037128702 58.0 0.0 223.0 0.0 475.0 986.0 NaN NaN NaN ... 2010 2903.0 64.726214 NaN NaN NaN NaN NaN NaN POLYGON ((-118.4487 34.16485, -118.43997 34.16...
1 06037131600 83.0 62.0 777.0 0.0 135.0 1355.0 NaN NaN NaN ... 2010 1487.0 28.679979 NaN NaN NaN NaN NaN NaN POLYGON ((-118.56229 34.22033, -118.55792 34.2...
2 06037134104 287.0 17.0 816.0 0.0 61.0 1323.0 NaN NaN NaN ... 2010 1388.0 14.846188 NaN NaN NaN NaN NaN NaN POLYGON ((-118.57976 34.21558, -118.57539 34.2...
3 06037134304 90.0 24.0 298.0 0.0 89.0 520.0 NaN NaN NaN ... 2010 928.0 33.378933 NaN NaN NaN NaN NaN NaN POLYGON ((-118.61472 34.21952, -118.61039 34.2...
4 06037242000 0.0 229.0 681.0 0.0 0.0 1164.0 NaN NaN NaN ... 2010 1054.0 0.058565 NaN NaN NaN NaN NaN NaN POLYGON ((-118.25416 33.93882, -118.25413 33.9...

5 rows × 194 columns

Code
gdf.columns.values
array(['geoid', 'n_asian_under_15', 'n_black_under_15',
       'n_hispanic_under_15', 'n_native_under_15', 'n_white_under_15',
       'n_persons_under_18', 'n_asian_over_60', 'n_black_over_60',
       'n_hispanic_over_60', 'n_native_over_60', 'n_persons_over_60',
       'n_white_over_60', 'n_asian_over_65', 'n_black_over_65',
       'n_hispanic_over_65', 'n_native_over_65', 'n_white_over_65',
       'n_persons_over_75', 'n_persons_over_15', 'n_civilians_over_16',
       'n_civilians_over_18', 'n_persons_over_25', 'n_age_5_older',
       'n_asian_age_distribution', 'n_black_age_distribution',
       'n_hispanic_age_distribution', 'n_native_age_distribution',
       'n_white_age_distribution', 'n_asian_persons', 'n_black_persons',
       'n_chinese_persons', 'n_labor_force', 'n_civilians_16_64',
       'n_edu_college_greater', 'n_cuban_pop',
       'n_poverty_determined_asian', 'n_poverty_determined_black',
       'n_total_pop_sample', 'n_female_over_16',
       'n_poverty_determined_families', 'n_poverty_determined_hispanic',
       'n_disabled', 'n_housing_units_multiunit_structures_denom',
       'n_poverty_determined_native', 'n_poverty_determined_persons',
       'n_poverty_determined_white', 'n_employed_over_16',
       'n_total_families', 'n_foreign_born_pop',
       'n_female_headed_families', 'n_filipino_persons',
       'n_female_labor_force', 'n_german_pop', 'n_german_born_pop',
       'n_household_recent_move', 'n_structures_30_old',
       'n_hawaiian_persons', 'n_total_households', 'n_asian_households',
       'n_black_households', 'n_hispanic_households',
       'n_white_households', 'median_household_income',
       'median_income_asianhh', 'median_income_blackhh',
       'median_income_hispanichh', 'median_income_whitehh',
       'n_hispanic_persons', 'n_edu_hs_less', 'n_total_housing_units',
       'per_capita_income', 'n_asian_indian_persons', 'n_irish_pop',
       'n_irish_born_pop', 'n_italian_pop', 'n_italian_born_pop',
       'n_japanese_persons', 'n_korean_persons', 'n_limited_english',
       'n_employed_manufacturing', 'n_married', 'n_mexican_pop',
       'median_home_value', 'median_contract_rent',
       'n_housing_units_multiunit_structures', 'n_recent_immigrant_pop',
       'n_poverty_over_65', 'n_poverty_asian', 'n_naturalized_pop',
       'n_poverty_black', 'n_poverty_families_children',
       'n_nonhisp_black_persons', 'n_poverty_hispanic',
       'n_nonhisp_white_persons', 'n_poverty_native', 'n_poverty_persons',
       'n_native_persons', 'n_poverty_white', 'n_occupied_housing_units',
       'n_other_language', 'n_owner_occupied_housing_units',
       'p_recent_immigrant_pop', 'p_household_recent_move',
       'p_asian_under_15', 'p_black_under_15', 'p_hispanic_under_15',
       'p_native_under_15', 'p_white_under_15', 'p_persons_under_18',
       'p_structures_30_old', 'p_persons_over_60', 'p_asian_over_65',
       'p_black_over_65', 'p_hispanic_over_65', 'p_native_over_65',
       'p_poverty_rate_over_65', 'p_white_over_65', 'p_persons_over_75',
       'p_poverty_rate_asian', 'p_asian_persons', 'p_poverty_rate_black',
       'p_chinese_persons', 'p_edu_college_greater', 'p_cuban_pop',
       'p_foreign_born_pop', 'p_female_headed_families',
       'p_filipino_persons', 'p_female_labor_force',
       'p_poverty_rate_children', 'p_german_pop', 'p_german_born_pop',
       'p_hawaiian_persons', 'p_hispanic_persons',
       'p_poverty_rate_hispanic', 'p_edu_hs_less',
       'p_asian_indian_persons', 'p_irish_pop', 'p_irish_born_pop',
       'p_italian_pop', 'p_italian_born_pop', 'p_japanese_persons',
       'p_korean_persons', 'p_limited_english',
       'p_employed_manufacturing', 'p_married', 'p_mexican_pop',
       'p_housing_units_multiunit_structures', 'p_poverty_rate_native',
       'p_naturalized_pop', 'p_nonhisp_black_persons', 'p_black_persons',
       'p_native_persons', 'p_other_language', 'n_total_pop',
       'p_owner_occupied_units', 'p_poverty_rate', 'p_puerto_rican_pop',
       'p_employed_professional', 'n_puerto_rican_pop',
       'n_employed_professional', 'p_russian_pop', 'p_russian_born_pop',
       'p_scandanavian_pop', 'p_scandanavian_born_pop',
       'p_employed_self_employed', 'p_unemployment_rate',
       'p_vacant_housing_units', 'p_veterans', 'p_vietnamese_persons',
       'p_widowed_divorced', 'p_poverty_rate_white',
       'n_renter_occupied_housing_units', 'n_russian_pop',
       'n_russian_born_pop', 'n_scandaniavian_pop',
       'n_scandaniavian__born_pop', 'n_employed_self_employed',
       'n_unemployed_persons', 'n_vacant_housing_units', 'n_veterans',
       'n_vietnamese_persons', 'n_widowed_divorced', 'n_white_persons',
       'year', 'n_total_housing_units_sample', 'p_nonhisp_white_persons',
       'p_white_over_60', 'p_black_over_60', 'p_hispanic_over_60',
       'p_native_over_60', 'p_asian_over_60', 'p_disabled', 'geometry'],
      dtype=object)
Code
gdf.n_total_pop
0       5497.0
1       5659.0
2       4486.0
3       2924.0
4       3415.0
         ...  
4575    3672.0
4576    5257.0
4577    6765.0
4578    2981.0
4579    3994.0
Name: n_total_pop, Length: 4580, dtype: float64
Code
gdf.geometry
0       POLYGON ((-118.4487 34.16485, -118.43997 34.16...
1       POLYGON ((-118.56229 34.22033, -118.55792 34.2...
2       POLYGON ((-118.57976 34.21558, -118.57539 34.2...
3       POLYGON ((-118.61472 34.21952, -118.61039 34.2...
4       POLYGON ((-118.25416 33.93882, -118.25413 33.9...
                              ...                        
4575    POLYGON ((-118.50373 34.42608, -118.5005 34.42...
4576    POLYGON ((-118.20731 33.90754, -118.20641 33.9...
4577    POLYGON ((-119.22134 34.1813, -119.21727 34.18...
4578    POLYGON ((-116.51068 33.80502, -116.51069 33.8...
4579    POLYGON ((-118.41378 34.1794, -118.4116 34.179...
Name: geometry, Length: 4580, dtype: geometry
Code
gdf.plot()

Projections

Code
gdf.crs
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
Code
gdf1 = gdf.to_crs(3857)
Code
gdf1.crs
<Projected CRS: EPSG:3857>
Name: WGS 84 / Pseudo-Mercator
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: World between 85.06°S and 85.06°N.
- bounds: (-180.0, -85.06, 180.0, 85.06)
Coordinate Operation:
- name: Popular Visualisation Pseudo-Mercator
- method: Popular Visualisation Pseudo Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
Code
gdf1.plot()

Code
gdf.median_home_value
0       647272.659176
1       400842.977528
2       416741.666667
3       406178.838951
4       251438.857678
            ...      
4575    291838.951311
4576    273871.254682
4577    293254.588015
4578    255794.662921
4579    581717.790262
Name: median_home_value, Length: 4580, dtype: float64
Code
gdf.plot(column='median_home_value')

Code
gdf.plot(column='median_home_value', legend=True)

Code
gdf.plot(column='median_home_value', legend=True,
        scheme='quantiles', k=10)

Code
gdf.plot(column='p_hispanic_persons', legend=True,
        scheme='quantiles', k=10)

Code
gdf.explore(column='p_hispanic_persons', tooltip=['geoid', 'p_hispanic_persons'])
Make this Notebook Trusted to load map: File -> Trust Notebook
Code
gdf.head()
geoid n_asian_under_15 n_black_under_15 n_hispanic_under_15 n_native_under_15 n_white_under_15 n_persons_under_18 n_asian_over_60 n_black_over_60 n_hispanic_over_60 ... year n_total_housing_units_sample p_nonhisp_white_persons p_white_over_60 p_black_over_60 p_hispanic_over_60 p_native_over_60 p_asian_over_60 p_disabled geometry
0 06037128702 58.0 0.0 223.0 0.0 475.0 986.0 NaN NaN NaN ... 2010 2903.0 64.726214 NaN NaN NaN NaN NaN NaN POLYGON ((-118.4487 34.16485, -118.43997 34.16...
1 06037131600 83.0 62.0 777.0 0.0 135.0 1355.0 NaN NaN NaN ... 2010 1487.0 28.679979 NaN NaN NaN NaN NaN NaN POLYGON ((-118.56229 34.22033, -118.55792 34.2...
2 06037134104 287.0 17.0 816.0 0.0 61.0 1323.0 NaN NaN NaN ... 2010 1388.0 14.846188 NaN NaN NaN NaN NaN NaN POLYGON ((-118.57976 34.21558, -118.57539 34.2...
3 06037134304 90.0 24.0 298.0 0.0 89.0 520.0 NaN NaN NaN ... 2010 928.0 33.378933 NaN NaN NaN NaN NaN NaN POLYGON ((-118.61472 34.21952, -118.61039 34.2...
4 06037242000 0.0 229.0 681.0 0.0 0.0 1164.0 NaN NaN NaN ... 2010 1054.0 0.058565 NaN NaN NaN NaN NaN NaN POLYGON ((-118.25416 33.93882, -118.25413 33.9...

5 rows × 194 columns

Code
county = gdf.geoid.str[:5]
Code
county
0       06037
1       06037
2       06037
3       06037
4       06037
        ...  
4575    06037
4576    06037
4577    06111
4578    06065
4579    06037
Name: geoid, Length: 4580, dtype: object
Code
gdf['county'] = county

Which county has the most tracts?

Code
gdf.groupby(by='county').count()
geoid n_asian_under_15 n_black_under_15 n_hispanic_under_15 n_native_under_15 n_white_under_15 n_persons_under_18 n_asian_over_60 n_black_over_60 n_hispanic_over_60 ... year n_total_housing_units_sample p_nonhisp_white_persons p_white_over_60 p_black_over_60 p_hispanic_over_60 p_native_over_60 p_asian_over_60 p_disabled geometry
county
06025 31 31 31 31 31 31 31 0 0 0 ... 31 31 31 0 0 0 0 0 0 31
06037 2344 2344 2344 2344 2344 2344 2344 0 0 0 ... 2344 2344 2328 0 0 0 0 0 0 2344
06059 582 582 582 582 582 582 582 0 0 0 ... 582 582 582 0 0 0 0 0 0 582
06065 453 453 453 453 453 453 453 0 0 0 ... 453 453 452 0 0 0 0 0 0 453
06071 369 369 369 369 369 369 369 0 0 0 ... 369 369 368 0 0 0 0 0 0 369
06073 627 627 627 627 627 627 627 0 0 0 ... 627 627 627 0 0 0 0 0 0 627
06111 174 174 174 174 174 174 174 0 0 0 ... 174 174 173 0 0 0 0 0 0 174

7 rows × 194 columns

Which county has the largest tract in area?

Code
gdf['area'] = gdf1.area
Code
gdf[['area', 'county']].sort_values(by='area', ascending=False)
area county
489 2.700979e+10 06071
437 1.430813e+10 06065
908 1.073840e+10 06071
4303 8.553053e+09 06025
1661 5.345018e+09 06071
... ... ...
715 1.237570e+05 06037
3562 1.161974e+05 06037
4062 9.984444e+04 06037
1669 4.045569e+03 06037
3673 2.537546e-01 06111

4580 rows × 2 columns

Code
gdf.columns.values
array(['geoid', 'n_asian_under_15', 'n_black_under_15',
       'n_hispanic_under_15', 'n_native_under_15', 'n_white_under_15',
       'n_persons_under_18', 'n_asian_over_60', 'n_black_over_60',
       'n_hispanic_over_60', 'n_native_over_60', 'n_persons_over_60',
       'n_white_over_60', 'n_asian_over_65', 'n_black_over_65',
       'n_hispanic_over_65', 'n_native_over_65', 'n_white_over_65',
       'n_persons_over_75', 'n_persons_over_15', 'n_civilians_over_16',
       'n_civilians_over_18', 'n_persons_over_25', 'n_age_5_older',
       'n_asian_age_distribution', 'n_black_age_distribution',
       'n_hispanic_age_distribution', 'n_native_age_distribution',
       'n_white_age_distribution', 'n_asian_persons', 'n_black_persons',
       'n_chinese_persons', 'n_labor_force', 'n_civilians_16_64',
       'n_edu_college_greater', 'n_cuban_pop',
       'n_poverty_determined_asian', 'n_poverty_determined_black',
       'n_total_pop_sample', 'n_female_over_16',
       'n_poverty_determined_families', 'n_poverty_determined_hispanic',
       'n_disabled', 'n_housing_units_multiunit_structures_denom',
       'n_poverty_determined_native', 'n_poverty_determined_persons',
       'n_poverty_determined_white', 'n_employed_over_16',
       'n_total_families', 'n_foreign_born_pop',
       'n_female_headed_families', 'n_filipino_persons',
       'n_female_labor_force', 'n_german_pop', 'n_german_born_pop',
       'n_household_recent_move', 'n_structures_30_old',
       'n_hawaiian_persons', 'n_total_households', 'n_asian_households',
       'n_black_households', 'n_hispanic_households',
       'n_white_households', 'median_household_income',
       'median_income_asianhh', 'median_income_blackhh',
       'median_income_hispanichh', 'median_income_whitehh',
       'n_hispanic_persons', 'n_edu_hs_less', 'n_total_housing_units',
       'per_capita_income', 'n_asian_indian_persons', 'n_irish_pop',
       'n_irish_born_pop', 'n_italian_pop', 'n_italian_born_pop',
       'n_japanese_persons', 'n_korean_persons', 'n_limited_english',
       'n_employed_manufacturing', 'n_married', 'n_mexican_pop',
       'median_home_value', 'median_contract_rent',
       'n_housing_units_multiunit_structures', 'n_recent_immigrant_pop',
       'n_poverty_over_65', 'n_poverty_asian', 'n_naturalized_pop',
       'n_poverty_black', 'n_poverty_families_children',
       'n_nonhisp_black_persons', 'n_poverty_hispanic',
       'n_nonhisp_white_persons', 'n_poverty_native', 'n_poverty_persons',
       'n_native_persons', 'n_poverty_white', 'n_occupied_housing_units',
       'n_other_language', 'n_owner_occupied_housing_units',
       'p_recent_immigrant_pop', 'p_household_recent_move',
       'p_asian_under_15', 'p_black_under_15', 'p_hispanic_under_15',
       'p_native_under_15', 'p_white_under_15', 'p_persons_under_18',
       'p_structures_30_old', 'p_persons_over_60', 'p_asian_over_65',
       'p_black_over_65', 'p_hispanic_over_65', 'p_native_over_65',
       'p_poverty_rate_over_65', 'p_white_over_65', 'p_persons_over_75',
       'p_poverty_rate_asian', 'p_asian_persons', 'p_poverty_rate_black',
       'p_chinese_persons', 'p_edu_college_greater', 'p_cuban_pop',
       'p_foreign_born_pop', 'p_female_headed_families',
       'p_filipino_persons', 'p_female_labor_force',
       'p_poverty_rate_children', 'p_german_pop', 'p_german_born_pop',
       'p_hawaiian_persons', 'p_hispanic_persons',
       'p_poverty_rate_hispanic', 'p_edu_hs_less',
       'p_asian_indian_persons', 'p_irish_pop', 'p_irish_born_pop',
       'p_italian_pop', 'p_italian_born_pop', 'p_japanese_persons',
       'p_korean_persons', 'p_limited_english',
       'p_employed_manufacturing', 'p_married', 'p_mexican_pop',
       'p_housing_units_multiunit_structures', 'p_poverty_rate_native',
       'p_naturalized_pop', 'p_nonhisp_black_persons', 'p_black_persons',
       'p_native_persons', 'p_other_language', 'n_total_pop',
       'p_owner_occupied_units', 'p_poverty_rate', 'p_puerto_rican_pop',
       'p_employed_professional', 'n_puerto_rican_pop',
       'n_employed_professional', 'p_russian_pop', 'p_russian_born_pop',
       'p_scandanavian_pop', 'p_scandanavian_born_pop',
       'p_employed_self_employed', 'p_unemployment_rate',
       'p_vacant_housing_units', 'p_veterans', 'p_vietnamese_persons',
       'p_widowed_divorced', 'p_poverty_rate_white',
       'n_renter_occupied_housing_units', 'n_russian_pop',
       'n_russian_born_pop', 'n_scandaniavian_pop',
       'n_scandaniavian__born_pop', 'n_employed_self_employed',
       'n_unemployed_persons', 'n_vacant_housing_units', 'n_veterans',
       'n_vietnamese_persons', 'n_widowed_divorced', 'n_white_persons',
       'year', 'n_total_housing_units_sample', 'p_nonhisp_white_persons',
       'p_white_over_60', 'p_black_over_60', 'p_hispanic_over_60',
       'p_native_over_60', 'p_asian_over_60', 'p_disabled', 'geometry',
       'county', 'area'], dtype=object)
Code
gdf.n_total_pop_sample
0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
4575   NaN
4576   NaN
4577   NaN
4578   NaN
4579   NaN
Name: n_total_pop_sample, Length: 4580, dtype: float64

Geopandas

  • GeoSeries
  • GeoDataFrame
  • County variable
  • area/crs/project
  • population density ## Studio
  • Report number of tracts by county
  • Median population tract population density by county
  • Population density by county
    • two group bys and division
    • later show them dissove